import os
import gensim
import sys


class MySentences(object):
    def __init__(self, dir_name):
        self.dir_name = dir_name

    def __iter__(self):
        for f_name in os.listdir(self.dir_name):
            for line in open(os.path.join(self.dir_name, f_name)):
                yield line.split()


if __name__ == '__main__':
    text_dir = sys.argv[1]
    model_file = sys.argv[2]
    sentences = MySentences(text_dir)
    model = gensim.models.Word2Vec(sentences, window=8, min_count=5, size=300, workers=6)
    model.save(model_file)
    model.raw_vocab
    # model.sorted_vocab()
